# compute LNG
# -------------------------------------------------------------------- #
# prelims ####
# -------------------------------------------------------------------- #

# clear environment
rm(list = ls())

# libraries
library(haven)
library(rgdal)
library(raster)
library(readxl) 
library(classInt)
library(GISTools)
library(tidyverse)
library(units)
library(reldist)
library(sf)
library(doMC)
library(foreach)
library(parallel)

# cores
nCores <- as.numeric(Sys.getenv("NSLOTS"))
if (is.na(nCores))nCores=1

paste("Running code using",nCores,"cores") 
registerDoMC(nCores)

# set seed
set.seed(123)

# paths
dir <- paste0("PATH_TO_MAIN_DIRECTORY_HERE")
dirdata <- paste0(dir,"data/int/")
dirlng <- paste0(dir,"data/int/lng/")
dirorig <- paste0(dir, "orig/catastro/")

# -------------------------------------------------------------------- #
# parameters ####
# -------------------------------------------------------------------- #

# List of cities
#city <- "08_900" # Barcelona
#city <- "28_900" # Madrid
#city <- "41_900" # Sevilla
#city <- "46_900" # Valencia
#city <- "50_900" # Zaragoza
#city <- "30_30" # Murcia
#city <- "07_40" # Palma Mallorca
#city <- "35_17" # Las Palmas
listcities <- c("08_900", "28_900", "08_900","28_900", "41_900","46_900","50_900",
                "30_30", "07_40", "35_17")

# LNG type
TYPE <- "VALUE"
#TYPE <- "SPACE"

# kernel
#KERNEL <- "UNIFORM" # all dwellings have the same weight
KERNEL <- "TRIANGULAR" # linear decay with distance (also computes uniform kernel)

# years
refyear <- 2019 
firstyear <- refyear
listyears <- c(firstyear, 2018, 2017, 2016, 2015)

# buffers - r parameter
firstbuff <- 100 # note: number has to be divisible by 50
listbuff <- c(firstbuff, 200, 350, 500, 750, 1000)

# -------------------------------------------------------------------- #
# part 1: lng city-year-buff -- BU cluster ####
# -------------------------------------------------------------------- #

## paths -------------------------------------------------------------
for(city in listcities){
  
  # keep track of progress
  print(paste0("Processing city ", city))
  print(Sys.time())
  
  if(city == "08_900"){ # Barcelona
    date <- "2020-04-24" 
  }else{ # other cities
    date <- "2019-02-07" 
    listyears <- 2019
    listbuff <- 100
    TYPE <- "SPACE"
  }

  dta_path <- paste0(dirdata, city,"_clus",".dta")
  gis_path <- paste0(dirorig, city, "_UH_", date, "_SHF/PARCELA/")
  price_path <- paste0(dirdata,"PredPriceForest_", city, ".dta")
  
## prepare data -----------------------------------------------------
# numerical data
NumData <- read_dta(dta_path) %>% 
    subset(House == 1| Apartment == 1) %>% 
    dplyr::select(c("PlotCode", "PropOrder2", "PropSqm", "YearConst")) %>% 
    subset(PropSqm > 10) %>%  
    distinct(PlotCode, PropOrder2, .keep_all = TRUE) 
  
# add predicted price - only barcelona
if(TYPE == "VALUE"){
  
  # prices
  PriceData <- read_dta(price_path)
  priceyear <- paste0("PHat", refyear)
  names(PriceData)[names(PriceData) == priceyear] <-"PHat"
  
  # add prices to numerical data
  NumData <- merge(x = NumData, y = PriceData[c("PlotCode", "PropOrder2", "PHat")], 
                   by = c("PlotCode", "PropOrder2"), all.x = FALSE, all.y = FALSE) 
  
  # inequality
  NumData$Val <- NumData$PHat
  NumData <- dplyr::select(NumData, -c(PHat))
  
  # check no missing
  print(colSums(is.na(NumData)))
  NumData <- NumData %>% na.omit(Val)
  
  # remove price data - save memory
  rm(PriceData)

}else if(TYPE == "SPACE"){
  NumData$Val <- NumData$PropSqm
}
  
# remove sqm - save memory
NumData <- dplyr::select(NumData, -c(PropSqm))

# load shape file
PlotShp <- st_read(stringsAsFactors = FALSE,
  dsn = path.expand(sprintf("%s", gis_path, "PARCELA"))
  , query="SELECT REFCAT FROM \"PARCELA\" WHERE FECHABAJA >= 30000000 AND PARCELA != '09000'"
)

# properties in numerical data
PlotShp_hs_apt <- PlotShp %>% subset(REFCAT %in% unique(NumData$PlotCode))

# centroids
PlotShp_hs_apt$cent <- st_centroid(PlotShp_hs_apt$geometry,TRUE)

# year of construction
PlotShp_hs_apt <- merge(x=PlotShp_hs_apt, y=NumData[c("PlotCode","YearConst")], 
                        by.x = "REFCAT", by.y = "PlotCode", all.x = FALSE, all.y = FALSE) %>%
  distinct(REFCAT, .keep_all = TRUE) 

# remove shape - save memory
rm(PlotShp)

# parcel identifier
PlotShp_hs_apt$rownum  <- seq.int(nrow(PlotShp_hs_apt)) 
NumData <- merge(x = NumData,
                 y = as.data.frame(PlotShp_hs_apt)[c("REFCAT","rownum")],
                 by.x = "PlotCode", by.y = "REFCAT", all.x = FALSE, all.y = FALSE)

## LNG -----------------------------------------------------

# buffers
for(buff in c(listbuff)){

BINS <- (buff/50) + 1
WEIGHTS <- rep(1, length = BINS)
BINS_HALF <- round(BINS/2) 
PLOTS <- nrow(PlotShp_hs_apt) 

# weights
if(KERNEL == "UNIFORM"){
  
  WEIGHTS <- replace(x = WEIGHTS, values = 1)
  
}else if(KERNEL == "TRIANGULAR"){
  
  WEIGHTS[1] <- 1
  
  for(j in 2:length(WEIGHTS)){
    WEIGHTS[j] <- 1 - ((50 * (j-1) - 25)/buff)
  }
}

# specific to 0 buffer
if(buff==0){
  WEIGHTS <- 1
}

# generate number of 'donuts' given buffer chosen
# these are the 'cut-off' points
DONUTS <- rep(buff, length = BINS)
for(j in 1:length(DONUTS)){
  DONUTS[j] <- 0 + ((50 * (j-1)))
}

# intersecting plots for each donut 
for(d in c(DONUTS)){
  
  # name of file
  PlotList <- paste0(city,"_interesect_result_diff_", d, ".RData")
  
    if(d==0){
      
      # get buffer
      intersect_result_diff_0 <- st_intersects(x = PlotShp_hs_apt$cent, y = PlotShp_hs_apt)
      
      for(row in 1:PLOTS){
        intersect_result_diff_0[[row]] <- row
      }
      
      # intersect previous results
      intersect_PREV <- intersect_result_diff_0 

    }else if(d>0){ # distance greater than 0
      
      # generate buffed cartography
      buffed <- st_buffer(x = PlotShp_hs_apt$cent, dist = d)
      # intersection
      intersect_result <- st_intersects(x = buffed, y = PlotShp_hs_apt)
      # copy to replace
      intersect_result_diff <- intersect_result
      # remove buffer
      rm(buffed)
      
      # get the difference
      for(row in 1:PLOTS){
        
        # difference with respect to previous buffer
        intersect_result_diff[[row]] <- setdiff(intersect_result[[row]],intersect_PREV[[row]])
        
      }
      
      # rename to previous intersection result
      intersect_PREV <- intersect_result
      newname <- paste0("intersect_result_diff_", d)
      assign(newname, intersect_result_diff)
      
      # last distance
      if(d == DONUTS[BINS]){
        intersect_result_max <- intersect_result
      }

    } 
} 
  
# generate matrix to store plots per bin
MATCHSTORED <- matrix(data = list(), nrow = PLOTS, ncol = (BINS*2)) 
MATCH_WEIGHT <- matrix(data = list(), nrow = PLOTS, ncol = 2) 

for(row in 1:PLOTS){
  
  for(col in 1:BINS){
    
    # weight col
    wcol <- BINS + col
    # get distance in bin
    d <- DONUTS[col]
    # name of file
    x <- get(paste0("intersect_result_diff_", DONUTS[col]))
    # plot
    MATCHSTORED[[row]][col] <- x[row]
    # weight
    MATCHSTORED[[row]][wcol] <- WEIGHTS[col]
    rm(x)
    # plots
    MATCH_WEIGHT[[row]][1] <- append(unlist(MATCH_WEIGHT[[row]][1]), unlist(MATCHSTORED[[row]][col])) %>% list()
    times <- length(unlist(MATCHSTORED[[row]][col]))
    # weights
    MATCH_WEIGHT[[row]][2] <- append(unlist(MATCH_WEIGHT[[row]][2]), unlist(rep(MATCHSTORED[[row]][wcol], times))) %>% list()
    
  } 
} 

# remove files to save memory
rm(MATCHSTORED)
rm(list=ls(pattern="^intersect_"))

  # LNG in year-buffer
  for(year in c(listyears)){
    
    # keep track of progress
    print(paste0("Processing year ", year, "; ", buff, "m buffer;"))
    print(Sys.time())
    
    # subset year
    NumDataYear <- NumData %>% subset(NumData$YearConst <= year)
    SHP_select <- PlotShp_hs_apt %>% subset(rownum %in% unique(NumDataYear$rownum))
    nrows <- nrow(SHP_select)

    if(nrows > 0){
      
      # Loop through the reference parcels
      RelIneqData <- foreach(name=rownames(SHP_select), .combine=rbind, .inorder=FALSE) %dopar% {
       
        # Get the entire row of the reference plot
        row_data <- SHP_select[name,]
        
        # this is the row identifier in the shape file (to link with REFCAT)
        intersection_id <- row_data$rownum
 
        # polygon identifier in the shape file
        REFCAT <- row_data$REFCAT
        
        # Get the intersection list parcels for the reference parcel
        intersection_list <- MATCH_WEIGHT[intersection_id][[1]] 
        
        # Numerical Data intersected with reference polygon
        NumData_pols_in_buff <- NumDataYear %>% 
                               subset(rownum %in% intersection_list[[1]])
        
        ## weights
        NumData_pols_in_buff$Weight <- 0
        
        # If Kernel is uniform, all plots have the same weight
        if(KERNEL == "UNIFORM"){
          
          NumData_pols_in_buff$Weight <- 1
        
        # If Kernel is not Uniform, assign specific weights
        }else{
 
         for(i in intersection_list[[1]]){
          
            pos <- which(intersection_list[[1]]==i)
            NumData_pols_in_buff$Weight[NumData_pols_in_buff$rownum == intersection_list[[1]][pos]] <- intersection_list[[2]][pos]
          }
        } 
        
        # Calculate the distribution and the results to the dataframe
        # x is the list of values in neighboring polygons
        x <- NumData_pols_in_buff$Val
        NumData_pols_in_buff$Pctile <- ecdf(x)(x)
        
        # Select the reference parcel along with the columns of interest
        RelIneqOut <- NumData_pols_in_buff %>% 
                    dplyr::select(c("PlotCode", "PropOrder2", "Val", "Pctile")) %>%
                    subset(PlotCode == REFCAT)
      
        # Add the columns and the values
        RelIneqOut$Neighbors <- nrow(NumData_pols_in_buff)-1
        
        # Compute Gini/Weighted Gini in local neighborhood
        RelIneqOut$Gini <-  gini(x = NumData_pols_in_buff$Val)
        RelIneqOut$WGini <-  gini(x = NumData_pols_in_buff$Val, weights = NumData_pols_in_buff$Weight)
        
        # Percentiles and other neighborhood of reference characteristics
        RelIneqOut$p01 <- quantile(NumData_pols_in_buff$Val, probs = c(0.01))
        RelIneqOut$p05 <- quantile(NumData_pols_in_buff$Val, probs = c(0.05))
        RelIneqOut$p10 <- quantile(NumData_pols_in_buff$Val, probs = c(0.1))
        RelIneqOut$p25 <- quantile(NumData_pols_in_buff$Val, probs = c(0.25))
        RelIneqOut$p50 <- quantile(NumData_pols_in_buff$Val, probs = c(0.5))
        RelIneqOut$p75 <- quantile(NumData_pols_in_buff$Val, probs = c(0.75))
        RelIneqOut$p90 <- quantile(NumData_pols_in_buff$Val, probs = c(0.9))
        RelIneqOut$p95 <- quantile(NumData_pols_in_buff$Val, probs = c(0.95))
        RelIneqOut$p99 <- quantile(NumData_pols_in_buff$Val, probs = c(0.99))
        RelIneqOut$Year <- year
        RelIneqOut$MeanVal <- mean(NumData_pols_in_buff$Val)
        
        # results
        return(RelIneqOut)
        
      } 
    }
    
    # store data
    if(TYPE == "SPACE"){
      
      if(KERNEL == "UNIFORM"){
        filename <- paste0(dirlng, "RelIneq_", city, "_", year, "_buff_", buff)
      }else if(KERNEL == "TRIANGULAR"){
        filename <- paste0(dirlng, "RelIneqW_", city, "_", year, "_buff_", buff)
      }
      
    }else if(TYPE == "VALUE"){
      
      if(KERNEL == "UNIFORM"){
        filename <- paste0(dirlng, "RelIneqVal_", city, "_", year, "_buff_", buff)
      }else if(KERNEL == "TRIANGULAR"){
        filename <- paste0(dirlng, "RelIneqWVal_", city, "_", year, "_buff_", buff)
      }

    }

    save(RelIneqData, file = paste0(filename, ".RData"))
    rm(RelIneqData)

  } 
} 
} 
# -------------------------------------------------------------------- #
# part 2: put together -- personal computer  ####
# -------------------------------------------------------------------- #
listyears <- c(firstyear, 2018, 2017, 2016, 2015)
listbuff <- c(firstbuff, 200, 350, 500, 750, 1000)

for(TYPE in c("VALUE","SPACE")){
  
for(city in listcities){ 
  for(year in c(listyears)){
    for(buff in c(listbuff)){

      # name of specific file
      if(TYPE == "SPACE"){
        if(KERNEL == "UNIFORM"){
          filename <- paste0(dirlng,"RelIneq_", city, "_", year, "_buff_", buff, ".RData")
        }else if(KERNEL == "TRIANGULAR"){
          filename <- paste0(dirlng, "RelIneqW_", city, "_", year, "_buff_", buff, ".RData")
        }
      }else if(TYPE == "VALUE"){
        if(KERNEL == "UNIFORM"){
          filename <- paste0(dirlng, "RelIneqVal_", city, "_", year, "_buff_", buff, ".RData")
        }else if(KERNEL == "TRIANGULAR"){
          filename <- paste0(dirlng, "RelIneqWVal_", city, "_", year, "_buff_", buff, ".RData")
        }
      }
  
  if(file.exists(filename)){ 
    
  # load file
  load(filename)
  
  # number of units in parcel
  grcount <-  RelIneqData %>% count(PlotCode)
  RelIneqData <- merge(x=RelIneqData, y=grcount, by="PlotCode") %>%
    distinct(PlotCode, .keep_all = TRUE)
  rm(grcount)
  
  RelIneqData <- RelIneqData[!names(RelIneqData) %in% c("PropOrder2","Val","Pctile")] 
  names(RelIneqData)[names(RelIneqData)=="n"] <- "Properties"
  RelIneqData$Buffer <- buff
  
  # append data with different buffers  
  if(buff == firstbuff & year == firstyear){ 
    RelIneqDataBuffYears <- RelIneqData
  }else{ 
    RelIneqDataBuffYears <- rbind(RelIneqDataBuffYears, RelIneqData)
  }
  
  # remove year file
  rm(RelIneqData)
  
    } 
    } 
  } 
  
  # store one file with all the years
  if(TYPE == "SPACE"){

    if(KERNEL == "UNIFORM"){
      filename <- paste0(dirdata,"RelIneqBuffYears_", city)
    }else if(KERNEL == "TRIANGULAR"){
      filename <- paste0(dirdata,"RelIneqWBuffYears_", city)
    }
    
    write_dta(data = RelIneqDataBuffYears, paste0(filename, ".dta"))

  }else if(TYPE == "VALUE"){

    if(KERNEL == "UNIFORM"){
      filename <- paste0(dirdata,"RelIneqValBuffYears_", city)
    }else if(KERNEL == "TRIANGULAR"){
      filename <- paste0(dirdata,"RelIneqWValBuffYears_", city)
    }
    
    write_dta(data = RelIneqDataBuffYears, paste0(filename, ".dta"))

  }
}
} 
# -------------------------------------------------------------------- #
# closing ####
# -------------------------------------------------------------------- #
rm(list = ls())
